Both models involve fitting data to a “best fit” line based on observations
Example: X=Time, Y= Accuracy; How does naming accuracy change over time?
In both, the goal of the model is to “fit a line” accounting for all observations of the data and minimize residuals (i.e., minimize error in the fitted model)
Linear Regression
Assumes Independence (observations are independent of each other, i.e., error is not predictable and errors are uncorrelated)
Assumes Homoscedasticity (variance of residuals is constant for any value of x)
Assumes Normal Distribution (y is normally distributed)
Assumes Linearity (relationship between x ands y is linear)
Assumptions are much the same, but LME models can account for non-independence using Fixed and Random Effects
Nested/Hierarchical Data
Level 1: Patient, Level 2: Therapist
-Fixed Effect(s): Age, Diagnosis, Treatment Condition
-Random Effect(s): Therapist
Longitudinal Data
Level 1: Time, Level 2: Patient
-Fixed Effect(s): Treatment, Time
-Random Effect(s): Patient
These are the predictor variables in the model (e.g., Time, Treatment). Their impact is “fixed” (i.e., constant) across individuals.
In standard linear models, there is only one random effect (i.e., random variance)
Mixed models can account for random variance from more than one source in the data
Random effects (random intercepts here) will allow the performance to vary for each patient
Random Intercept
Pros
-Accounts for nested or longitudinal data
-Robust to (some) missing data
-Can be used with relatively small N (<50)
Cons
-Complicated
-More difficult to explain and interpret
library(reshape2)
library(tidyverse)
library(stats)
library(psych)
library(readr)
library(knitr)
library(tibble)
library(readr)
library(outliers)
library(corrplot)
library(magrittr)
library(qwraps2)
library(arsenal)
library(naniar)
library(boot)
library(lme4)
library(lattice)
library(lmerTest)
library(psych)
library(doBy)
library(car)
library(DescTools)
library(sjstats)
library(ggplot2)
library(rstatix)
max.theme <- theme_bw() + theme(axis.line = element_line(colour = "black"), panel.grid.minor = element_blank(), panel.grid.major = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.title= element_blank())
Data Overview: 10 patients with Primary Progressive Aphasia (svPPA, lvPPA, Alzheimer’s Disease) participated in a two-year language treatment study. Participants were provided a finite set of target words for treatment (N=100) and control words (N=100). Participants trained on Treatment words weekly. Every six months, patients were assessed on naming performance (Treatment and Control words) and neuropsychological performance (BNT, PPT, Digit Span, etc.)
We are interested in comparing Treatment vs. Control words over time to see if Treatment words were maintained vs. Control words.
Predictor Variable(s): Time (T1, T2, T3, T4), Condition (Train, Control)
Outcome Variable(s): Accuracy (% named correctly)
setwd("~/Documents/RData/LME Tutorial")
LexDropMaster<- read_csv("LexDropMaster.csv")
#Observe Data
LexDropMaster
#Remove non-interest variables
Less <-LexDropMaster [-c(1,3,4,5,7,16,18,19,20,22,23,31)]
####Group by Patient, Timepoint, and Condition####
RearrangeLess <- Less %>%
group_by(Patient, TimePoint, Condition) %>%
summarize_at(vars(c(1:12)),mean, na.rm=TRUE) ## "na.rm=TRUE" skips over NAs in variable
####Remove T5
RearrangeLess<-subset(RearrangeLess, TimePoint!="T5")
RearrangeLess
SummaryStats <-RearrangeLess %>%
group_by(Condition, TimePoint)%>%
summarize(mean=mean(Acc, na.rm=TRUE),
sd=sd(Acc, na.rm=TRUE))
SummaryStats
##Note:Check for missing data
#Across patients
TotalAccMeansPlot2 <- ggplot(RearrangeLess, aes(TimePoint, Acc, color=Condition, fill=Condition)) +
stat_summary(fun.data = mean_se, geom = "point", size = 2 ) +stat_summary(fun.data = mean_se, geom = "line", size = .5, aes(group = Condition))+ stat_summary(fun.data = mean_se, geom = "errorbar")+max.theme
print(TotalAccMeansPlot2)
#Each patient
ggplot(RearrangeLess, aes(TimePoint, Acc, shape = TimePoint, fill = Condition)) +
geom_point(shape = 21, color = "black", size = 5, alpha = 0.6, position = position_jitter(w = 0.03,
h = 0)) + stat_summary(fun.y = mean, fun.ymin = mean, fun.ymax = mean,
geom = "crossbar", color = "black", size = 0.4, width = 0.3) + max.theme
betterAccmodel <- lmer(Acc ~ TimePoint + Condition + TimePoint:Condition + (1|Patient), data=RearrangeLess)
outlierTest(betterAccmodel) #Outliers
## No Studentized residuals with Bonferroni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferroni p
## 18 2.296081 0.025579 NA
qqmath(betterAccmodel) #Normality
#Note: Plot shows observed Residuals (Y) vs. Theoretic (X). If both sets of quantiles are from the same distribution, it should form a fairly straight line. Normality can be fixed with transformations, like in standard linear models.
plot(betterAccmodel)
#Note: Plot shows Studentized Residuals (Y) vs. Fitted (X) (i.e., Model Predicted) Values. We should not be able to discern a pattern here. Example: a U-shape (polynomial relationship).
betterAccmodel <- lmer(Acc ~ TimePoint + Condition + TimePoint:Condition + (1|Patient), data=RearrangeLess)
anova(betterAccmodel)
FixedWide<- read_csv("~/Documents/Professional/Talks/RANOVA.csv") #Read in data
gg_miss_var(FixedWide) #Plot Missingness
res.aov <- anova_test(
data = FixedWide, dv = Acc, wid = Patient,
within = c(Condition, TimePoint))
## Warning: NA detected in rows: 10,12,14,15,16,42,44,45,46,48,73,76,78,79,80.
## Removing this rows before the analysis.
get_anova_table(res.aov)
#See how DFs are impacted due to missingness/Listwise Deletion. Example: Main effect of Treatment, df(1,6)?
###Clean Data
DifPrep<-subset(RearrangeLess, TimePoint!="T2" &TimePoint!="T3")
DifPrep2<- DifPrep %>% group_by(Patient, Condition) %>% filter(n() > 1) %>%
mutate(Difference = Acc - lag(Acc))
DifPrep2<-DifPrep2[-c(4:15)]
DifPrep2<-DifPrep2[complete.cases(DifPrep2), ]
DifPrep2<-subset(DifPrep2, Patient!="S16")
###Prep Data
NewcatStats <- DifPrep2 %>% group_by(Condition) %>%
summarize_at(vars(3), funs(mean,se, sd), na.rm =TRUE) %>% as.data.frame()
####Make plot of means by Code####
colvec<- c("darkorange","lightblue")
NewCatPlot2 <- ggplot(NewcatStats, aes(Condition, mean, fill= Condition)) +
geom_point(shape = 21, size = 2.3,) + geom_bar(stat = "summary", fun.y="mean", alpha=.6) + coord_cartesian(ylim = c(-.24,.01))+
scale_fill_manual(values=colvec) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width=0.01)+scale_x_discrete(limits=c("train","control"))+max.theme
print(NewCatPlot2)
###Create Difference Scores
DFtest <- t.test(Difference ~ Condition, data = DifPrep2, paired=TRUE)
DFtest
##
## Paired t-test
##
## data: Difference by Condition
## t = -4.3908, df = 6, p-value = 0.004614
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.29074647 -0.08265529
## sample estimates:
## mean of the differences
## -0.1867009
####Effect Size (i.e., magnitude of difference between variables)
library(effsize)
EffSize<-cohen.d(DifPrep2$Difference ~ DifPrep2$Condition, paired = TRUE)
EffSize
##
## Cohen's d
##
## d estimate: -1.681031 (large)
## 95 percent confidence interval:
## lower upper
## -2.976799 -0.385263
###Prep Data
LexDropMaster<- read_csv("LexDropMaster.csv")
Less <-LexDropMaster [-c(1,3,4,5,7,16,18,19,20,22,23,31)]
Less2 <-Less[c(1:10,12)]
###Group by Patient
CorrPrep <- Less2 %>%
group_by(Patient) %>%
summarize_at(vars(c(1:10)),mean, na.rm=TRUE)
CorrPrep <-CorrPrep[-c(1,2)]
####Run Correlations
CorrPlot <-cor(CorrPrep, method = "pearson")
sig <- cor.mtest(CorrPrep, conf.level = .95) ##run correlations for each variable
###Make the Correlation Plot
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
cont.plot2<- corrplot(CorrPlot, method="color", col=col(200),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
# Combine with significance
p.mat = sig$p, sig.level = 0.05, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag=FALSE )
####Show P-values for your reference#######
corr.data = function(CorrPlot) {
#Get correlations
cor.vals = cor(CorrPlot)
###Get p-values
cor.p = cor.mtest(CorrPlot, conf.level = 0.95)$p
rownames(cor.p) = rownames(cor.vals)
colnames(cor.p) = colnames(cor.vals)
cbind(rowvars=rownames(cor.vals), data.frame(cor.vals)) %>%
gather(colvars, corr, -rowvars) %>%
left_join(cbind(rowvars=rownames(cor.p), data.frame(cor.p)) %>%
gather(colvars, p.value, -rowvars))
}
corr.data(CorrPrep) %>%
ggplot(aes(colvars, fct_rev(rowvars))) +
geom_tile(colour="grey70", fill=NA) +
geom_text(aes(label=sprintf("%1.2f", corr)), position=position_nudge(y=0.2),
size=3, colour="grey20") +
geom_text(aes(label=paste0("(",sprintf("%1.2f", p.value),")")), position=position_nudge(y=-0.2),
colour="grey20", size=2.5) +
labs(x="",y="") +
theme_classic() +
coord_fixed()
library(gridExtra)
#Creat Each Correlation Plot
plot1<- ggplot(CorrPrep, aes(x=CorrPrep$BNT, y=CorrPrep$Acc)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3", fill="purple")+max.theme+labs(x="BNT", y = "Acc")
plot2 <-ggplot(CorrPrep, aes(x=CorrPrep$PPTpics, y=CorrPrep$Acc)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3")+max.theme+labs(x="PPTpics", y = "Acc")
plot3<-ggplot(CorrPrep, aes(x=CorrPrep$PPTwords, y=CorrPrep$Acc)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3")+max.theme+labs(x="PPTwords", y = "Acc")
plot4<-ggplot(CorrPrep, aes(x=CorrPrep$BNT, y=CorrPrep$PPTpics)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3")+max.theme+labs(x="BNT", y = "PPTpics")
plot5<-ggplot(CorrPrep, aes(x=CorrPrep$BNT, y=CorrPrep$PPTwords)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3")+scale_y_continuous(breaks=c(15,20,25))+max.theme+labs(x="BNT", y = "PPTwords")
plot6<-ggplot(CorrPrep, aes(x=CorrPrep$PPTpics, y=CorrPrep$PPTwords)) +geom_smooth(method=lm, se=FALSE,color="grey")+
geom_point(size=2, color="steelblue3")+max.theme+labs(x="PPTpics", y = "PPTwords")
#Put them all in one grid
grid.arrange(plot1, plot5,plot2,plot6,plot3,plot4, nrow = 3)